package com.zzyl.common.utils;

import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.font.FontMapper;
import org.apache.pdfbox.text.PDFTextStripper;


import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

@Slf4j
public class PDFUtil {

    public static String pdfToString(InputStream inputStream) throws IOException {
        // 禁用字体解析
        System.setProperty("pdfbox.fontParsing", "false");

        try (InputStream stream = inputStream;
             PDDocument document = PDDocument.load(stream);
        ) {
            // 创建一个PDFTextStripper实例来提取文本
            PDFTextStripper pdfStripper = new PDFTextStripper();

            // 从PDF文档中提取文本
            String text = pdfStripper.getText(document);
            return text;

        } catch (IOException e) {
            log.error("解析PDF文件异常", e);
            throw new RuntimeException(e);
        }

    }

    public static void main(String[] args) throws IOException {
        FileInputStream fileInputStream = new FileInputStream("D:\\00-java就业班\\项目一\\Day08. 智能评估-集成AI大模型\\资料\\体检报告样例\\体检报告-刘爱国-男-69岁.pdf");

        String result = PDFUtil.pdfToString(fileInputStream);
        System.out.println(result);
    }
}